This code is for the caregiver speech measures from transcripts used in the manuscript.
library(tidyverse)
library(Hmisc)
library(GGally)
library(ppcor)
library(gridExtra)
library(psych)
# https://github.com/ggobi/ggally/issues/139
my_custom_smooth <- function(data, mapping, ...) {
ggplot(data = data, mapping = mapping) +
geom_point(alpha = .4, color = I("black")) +
geom_smooth(method = "lm", color = I("blue"), ...)
}
theme_set(theme_bw())
# NOTE about periods of non-tCDCS
# gemods refers to when there are designated start/end periods of other-directed speech (ODS); this was captured using gems (@G) using CHAT conventions
# kwalods refers to when ODS was transcribed at an utterance-level within a tCDS activity period between caregiver and child (e.g., other-directed speech in the background); this was captured per utterances using CHAT postcodes
## for tokens/min and types/min, we do not include ODS that occurred within a period of tCDS, because durations were captured by activity and not by utterance
## for mlu, we include all ODS across gemods and kwalods
# NOTE about speech == "all"
# "speech" includes two levels: all, spont
# all = refers to all speech by caregivers
# spont = refers to only speech by caregivers that was considered spontaneous rather than recited (e.g., reading book text, singing memorized common songs like itsy bitsy spider); therefore, 'spont' is a subset of 'all'
# freq
freq <- read_csv("./data_demo_lena_transcripts/freq.csv") %>%
dplyr::select(-"...1") %>%
filter(activity != "kwalods") %>%
filter(speech == "all") %>%
mutate(activity = factor(activity, levels = c("books", "play", "food",
"routines", "conv", "ac",
"gemods")),
id = factor(id),
language = factor(language),
speech = factor(speech))
# mlu
mlu <- read_csv("./data_demo_lena_transcripts/mlu.csv") %>%
mutate(activity = factor(activity, levels = c("books", "play", "food",
"routines", "conv", "ac", "ods")),
id = factor(id),
language = factor(language),
speech = factor(speech)) %>%
filter(speech == "all")
# chip
# this includes only caregivers, therefore there is no speaker column
# we exclude periods of ODS because this is about responsiveness to the child during periods of tCDS
chip <- read_csv("./data_demo_lena_transcripts/chip.csv") %>%
filter(activity != "ods") %>%
mutate(activity = factor(activity, levels = c("books", "play", "food",
"routines", "conv", "ac")),
id = factor(id),
language = factor(language))
str(freq)
## tibble [3,308 × 12] (S3: tbl_df/tbl/data.frame)
## $ id : Factor w/ 90 levels "7292","7352",..: 47 47 47 47 50 50 52 52 52 52 ...
## $ rectime : num [1:3308] 11923 11923 31360 31360 21499 ...
## $ activity : Factor w/ 7 levels "books","play",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ speaker : chr [1:3308] "CHI" "ADULTS" "CHI" "ADULTS" ...
## $ tokens : num [1:3308] 30 151 35 143 58 588 42 286 33 152 ...
## $ types : num [1:3308] 17 70 17 65 17 199 19 53 17 59 ...
## $ segment_num : num [1:3308] 12 12 15 15 2 2 11 11 5 5 ...
## $ language : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
## $ speech : Factor w/ 1 level "all": 1 1 1 1 1 1 1 1 1 1 ...
## $ dur_min : num [1:3308] 3.55 3.55 6.57 6.57 4.71 ...
## $ tokens_permin: num [1:3308] 8.46 42.57 5.32 21.75 12.31 ...
## $ types_permin : num [1:3308] 4.79 19.73 2.59 9.89 3.61 ...
str(mlu)
## tibble [3,002 × 9] (S3: tbl_df/tbl/data.frame)
## $ id : Factor w/ 90 levels "7292","7352",..: 46 46 46 46 46 46 46 46 46 46 ...
## $ activity : Factor w/ 7 levels "books","play",..: 6 6 5 5 7 7 2 2 6 6 ...
## $ speaker : chr [1:3002] "ADULTS" "CHI" "ADULTS" "CHI" ...
## $ segment_num: num [1:3002] 2 2 2 2 2 2 2 2 3 3 ...
## $ words_sum : num [1:3002] 210 66 175 43 11 16 189 47 261 78 ...
## $ num_utt_sum: num [1:3002] 66 35 64 24 2 12 64 28 87 43 ...
## $ mlu_w : num [1:3002] 3.18 1.89 2.73 1.79 5.5 ...
## $ language : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
## $ speech : Factor w/ 2 levels "all","spont": 1 1 1 1 1 1 1 1 1 1 ...
str(chip)
## tibble [1,118 × 11] (S3: tbl_df/tbl/data.frame)
## $ activity : Factor w/ 6 levels "books","play",..: 6 5 2 6 5 4 6 5 4 2 ...
## $ id : Factor w/ 90 levels "7292","7352",..: 46 46 46 46 46 46 46 46 46 46 ...
## $ rectime : num [1:1118] 15242 15242 15242 14342 14342 ...
## $ total_adult_utt : num [1:1118] 68 64 65 91 43 13 50 8 65 127 ...
## $ total_child_utt : num [1:1118] 46 34 33 54 17 3 14 1 29 49 ...
## $ total_adult_resp : num [1:1118] 62 51 54 77 24 9 30 4 56 106 ...
## $ total_adult_imitexp : num [1:1118] 18 13 15 25 5 2 9 0 16 21 ...
## $ prop_adultresp_outof_childutt : num [1:1118] 1.35 1.5 1.64 1.43 1.41 ...
## $ prop_adult_imitexp_outof_childutt: num [1:1118] 0.391 0.382 0.455 0.463 0.294 ...
## $ language : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
## $ segment_num : num [1:1118] 2 2 2 3 3 3 4 4 4 5 ...
# FREQ
freq_adult <- freq %>%
filter(speaker == "ADULTS")
# MLU
mlu_adult <- mlu %>%
filter(speaker == "ADULTS")
Freq (tokens, types)
# relabel for plots
freq_adult <- freq_adult %>%
mutate(activity = recode(activity, "conv" = "unst. conv.", "ac" = "adult-centered",
"gemods" = "non_tcds"),
language = recode(language, "English" = "english", "Spanish" = "spanish"))
# plot for all speech
ggplot(freq_adult, aes(activity, tokens, fill = activity)) +
geom_boxplot() +
geom_jitter(alpha = .2) +
scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) +
facet_wrap(~ language, ncol = 1) +
theme(legend.position= "none") +
theme(text = element_text(size = 35)) +
theme(axis.text.x = element_text(angle = 20, hjust = .7)) +
labs(x = "", y = "")
# summarize data per participant and activity (all speech only)
freq_adult_act <- freq_adult %>%
group_by(id, activity) %>%
mutate(tokens_act = mean(tokens),
types_act = mean(types)) %>%
distinct(id, language, activity, tokens_act, types_act)
freq_adult_act_en <- freq_adult_act %>% filter(language == "english")
freq_adult_act_sp <- freq_adult_act %>% filter(language == "spanish")
# descriptives
describeBy(freq_adult_act_en$tokens_act, freq_adult_act_en$activity, mat = T, fast = T)
## item group1 vars n mean sd median min max range skew
## X11 1 books 1 22 480.90076 272.79828 414.25000 147.0 1115.7500 968.7500 0.6058672
## X12 2 play 1 39 234.99060 132.96267 208.33333 42.5 714.0000 671.5000 1.0915721
## X13 3 food 1 31 145.78495 124.42440 89.66667 12.0 432.0000 420.0000 0.8051817
## X14 4 routines 1 32 135.71823 107.06498 106.25000 7.0 494.0000 487.0000 1.3675315
## X15 5 unst. conv. 1 43 152.50504 100.82982 114.50000 11.0 382.7500 371.7500 0.5656557
## X16 6 adult-centered 1 45 90.56519 60.38539 87.50000 4.0 295.0000 291.0000 0.9865811
## X17 7 non_tcds 1 45 154.69407 96.87180 152.00000 15.0 418.6667 403.6667 0.7289589
## kurtosis se
## X11 -0.7675539 58.160789
## X12 2.0768412 21.291067
## X13 -0.7341471 22.347282
## X14 2.1046767 18.926592
## X15 -0.7104771 15.376403
## X16 1.3754902 9.001722
## X17 0.1933267 14.440795
freq_adult_en_minmax_tokens_raw <- freq_adult %>%
filter(language == "english") %>%
group_by(activity, language) %>%
mutate(min = min(tokens, na.rm = T),
max = max(tokens, na.rm = T)) %>%
distinct(activity, min, max)
freq_adult_en_minmax_tokens_raw
## # A tibble: 7 × 4
## # Groups: activity, language [7]
## language activity min max
## <fct> <fct> <dbl> <dbl>
## 1 english books 65 1474
## 2 english play 0 743
## 3 english unst. conv. 4 817
## 4 english routines 7 692
## 5 english food 1 630
## 6 english adult-centered 0 397
## 7 english non_tcds 0 945
describeBy(freq_adult_act_sp$tokens_act, freq_adult_act_sp$activity, mat = T, fast = T)
## item group1 vars n mean sd median min max range skew
## X11 1 books 1 20 436.9875 212.49434 425.9167 77.000000 768.0000 691.0000 -0.1097107
## X12 2 play 1 37 180.9775 136.02617 123.3333 25.000000 571.0000 546.0000 0.9509845
## X13 3 food 1 31 155.3602 119.25669 128.0000 6.000000 445.0000 439.0000 0.7248658
## X14 4 routines 1 35 166.9405 133.27275 135.0000 4.000000 635.5000 631.5000 1.4170829
## X15 5 unst. conv. 1 43 139.6674 123.38751 106.6000 13.000000 754.6667 741.6667 2.9611793
## X16 6 adult-centered 1 45 106.6911 89.50469 71.4000 21.500000 426.5000 405.0000 1.7694977
## X17 7 non_tcds 1 45 113.3085 82.01389 103.5000 4.166667 323.5000 319.3333 0.9357912
## kurtosis se
## X11 -1.0449431 47.51518
## X12 0.1647279 22.36256
## X13 -0.5873931 21.41913
## X14 2.3099708 22.52721
## X15 11.7931488 18.81642
## X16 2.6909840 13.34257
## X17 0.1726090 12.22591
freq_adult_sp_minmax_tokens_raw <- freq_adult %>%
filter(language == "spanish") %>%
group_by(activity, language) %>%
mutate(min = min(tokens, na.rm = T),
max = max(tokens, na.rm = T)) %>%
distinct(activity, min, max)
freq_adult_sp_minmax_tokens_raw
## # A tibble: 7 × 4
## # Groups: activity, language [7]
## language activity min max
## <fct> <fct> <dbl> <dbl>
## 1 spanish books 77 927
## 2 spanish play 15 802
## 3 spanish unst. conv. 5 1012
## 4 spanish routines 4 937
## 5 spanish food 6 566
## 6 spanish adult-centered 0 566
## 7 spanish non_tcds 0 795
freq_adult <- freq_adult %>%
mutate(language = recode(language, "english" = "English", "spanish" = "Spanish"))
# plot for all speech
ggplot(freq_adult, aes(activity, tokens_permin, fill = activity)) +
theme_classic() +
geom_boxplot() +
geom_jitter(alpha = .3) +
scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) +
facet_wrap(~ language) +
theme(legend.position= "none") +
theme(text = element_text(size = 35)) +
theme(axis.text.x = element_text(angle = 20, hjust = .7)) +
labs(x = "", y = "Tokens Rate") +
theme(panel.spacing = unit(4, "lines"))
ggsave("./figures/boxplot_tokens_rate2.pdf", dpi = 300, width = 18, height = 8, units = "in")
# summarize data per participant and activity (all speech only)
freq_adult_act_permin <- freq_adult %>%
group_by(id, activity) %>%
mutate(tokens_permin_act = mean(tokens_permin),
types_permin_act = mean(types_permin)) %>%
distinct(id, language, activity, tokens_permin_act, types_permin_act)
freq_adult_act_permin_en <- freq_adult_act_permin %>% filter(language == "English")
freq_adult_act_permin_sp <- freq_adult_act_permin %>% filter(language == "Spanish")
# descriptives
describeBy(freq_adult_act_permin_en$tokens_permin_act, freq_adult_act_permin_en$activity, mat = T, fast = T)
## item group1 vars n mean sd median min max range skew
## X11 1 books 1 22 97.05612 25.83716 99.10089 32.157566 139.35186 107.19429 -0.4652193
## X12 2 play 1 39 63.00664 24.36993 58.04526 19.809892 122.54285 102.73296 0.6362527
## X13 3 food 1 31 63.20593 33.34061 56.34873 17.818695 191.38756 173.56887 1.7407592
## X14 4 routines 1 32 70.52775 27.24599 66.26826 28.267219 157.18563 128.91841 1.1449756
## X15 5 unst. conv. 1 43 76.14884 35.36817 73.18092 10.266822 224.58716 214.32033 1.4451765
## X16 6 adult-centered 1 45 85.01633 73.45787 77.63141 19.605147 534.52116 514.91601 5.0378276
## X17 7 non_tcds 1 45 36.98633 21.17975 39.00190 4.063964 87.65849 83.59453 0.5225623
## kurtosis se
## X11 -0.09411375 5.508500
## X12 0.06339446 3.902312
## X13 4.60071546 5.988150
## X14 1.57756298 4.816457
## X15 5.13425782 5.393596
## X16 28.20995719 10.950453
## X17 -0.45795070 3.157290
freq_adult_en_minmax_tokens_rate <- freq_adult %>%
filter(language == "English") %>%
group_by(activity, language) %>%
mutate(min = min(tokens_permin, na.rm = T),
max = max(tokens_permin, na.rm = T)) %>%
distinct(activity, min, max)
freq_adult_en_minmax_tokens_rate
## # A tibble: 7 × 4
## # Groups: activity, language [7]
## language activity min max
## <fct> <fct> <dbl> <dbl>
## 1 English books 21.7 149.
## 2 English play 0 183.
## 3 English unst. conv. 2.93 225.
## 4 English routines 21.4 157.
## 5 English food 12.3 191.
## 6 English adult-centered 0 535.
## 7 English non_tcds 0 167.
describeBy(freq_adult_act_permin_sp$tokens_permin_act, freq_adult_act_permin_sp$activity, mat = T, fast = T)
## item group1 vars n mean sd median min max range skew
## X11 1 books 1 20 69.70156 22.42204 71.71504 33.415438 114.94578 81.53034 0.11976814
## X12 2 play 1 37 51.27875 20.41805 52.27932 8.535673 118.63334 110.09767 0.62021093
## X13 3 food 1 31 37.39510 15.81841 37.57642 9.649060 71.37418 61.72512 0.07164798
## X14 4 routines 1 35 59.05829 22.87843 58.32433 13.861479 118.88422 105.02275 0.42422784
## X15 5 unst. conv. 1 43 59.56066 23.87057 57.76884 24.971949 137.97635 113.00440 0.98462727
## X16 6 adult-centered 1 45 56.17319 33.33514 53.78916 14.119577 161.15476 147.03518 1.17089383
## X17 7 non_tcds 1 45 31.05852 17.21041 28.27339 2.157086 71.16063 69.00355 0.29725236
## kurtosis se
## X11 -0.8333837 5.013720
## X12 1.4100577 3.356707
## X13 -0.7574487 2.841070
## X14 0.2797878 3.867161
## X15 1.0899492 3.640229
## X16 1.4051846 4.969310
## X17 -0.8203294 2.565577
freq_adult_sp_minmax_tokens_rate <- freq_adult %>%
filter(language == "Spanish") %>%
group_by(activity, language) %>%
mutate(min = min(tokens_permin, na.rm = T),
max = max(tokens_permin, na.rm = T)) %>%
distinct(activity, min, max)
freq_adult_sp_minmax_tokens_rate
## # A tibble: 7 × 4
## # Groups: activity, language [7]
## language activity min max
## <fct> <fct> <dbl> <dbl>
## 1 Spanish books 33.4 115.
## 2 Spanish play 8.54 128.
## 3 Spanish unst. conv. 10.4 140.
## 4 Spanish routines 13.3 183.
## 5 Spanish food 9.65 91.1
## 6 Spanish adult-centered 0 343.
## 7 Spanish non_tcds 0 153.
# df for just all speech
ggplot(freq_adult, aes(activity, types, fill = activity)) +
geom_boxplot() +
geom_jitter(alpha = .2) +
scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) +
facet_wrap(~ language) +
theme(legend.position= "none") +
theme(text = element_text(size = 35)) +
theme(axis.text.x = element_text(angle = 20, hjust = .7)) +
labs(x = "", y = "")
# descriptives
describeBy(freq_adult_act_en$types_act, freq_adult_act_en$activity, mat = T, fast = T)
## item group1 vars n mean sd median min max range skew
## X11 1 books 1 22 143.90227 66.70063 116.50000 56.0 277.7500 221.7500 0.4732497
## X12 2 play 1 39 85.90256 36.92368 81.50000 27.5 197.0000 169.5000 0.8994632
## X13 3 food 1 31 64.56989 42.01631 50.00000 12.0 177.0000 165.0000 0.6751602
## X14 4 routines 1 32 58.62083 31.98695 52.00000 7.0 144.0000 137.0000 0.6238625
## X15 5 unst. conv. 1 43 65.25039 36.72048 53.33333 10.0 163.0000 153.0000 0.6798436
## X16 6 adult-centered 1 45 44.60630 24.26759 43.40000 4.0 129.0000 125.0000 0.8473480
## X17 7 non_tcds 1 45 79.91481 44.48101 71.33333 10.5 199.8333 189.3333 0.4831218
## kurtosis se
## X11 -1.12778313 14.220623
## X12 1.00289109 5.912521
## X13 -0.46931520 7.546351
## X14 0.06720687 5.654547
## X15 -0.18936585 5.599821
## X16 1.60694389 3.617598
## X17 -0.42847167 6.630837
freq_adult_en_minmax_types_raw <- freq_adult %>%
filter(language == "English") %>%
group_by(activity, language) %>%
mutate(min = min(types, na.rm = T),
max = max(types, na.rm = T)) %>%
distinct(activity, min, max)
freq_adult_en_minmax_types_raw
## # A tibble: 7 × 4
## # Groups: activity, language [7]
## language activity min max
## <fct> <fct> <dbl> <dbl>
## 1 English books 38 369
## 2 English play 0 253
## 3 English unst. conv. 4 267
## 4 English routines 7 246
## 5 English food 1 181
## 6 English adult-centered 0 159
## 7 English non_tcds 0 351
describeBy(freq_adult_act_sp$types_act, freq_adult_act_sp$activity, mat = T, fast = T)
## item group1 vars n mean sd median min max range skew
## X11 1 books 1 20 144.82500 64.74266 138.50000 44.0 268.5000 224.5000 0.2285406
## X12 2 play 1 37 62.33784 35.84100 54.66667 14.0 157.0000 143.0000 0.6215909
## X13 3 food 1 31 65.50538 36.39642 69.00000 5.0 133.0000 128.0000 0.1241942
## X14 4 routines 1 35 65.69762 33.60470 65.00000 4.0 141.5000 137.5000 0.3473994
## X15 5 unst. conv. 1 43 59.88527 32.98391 52.00000 8.0 160.6667 152.6667 0.8136958
## X16 6 adult-centered 1 45 48.42926 26.92731 40.60000 15.0 121.0000 106.0000 1.0463985
## X17 7 non_tcds 1 45 59.29000 36.12099 55.16667 3.5 137.8333 134.3333 0.5162437
## kurtosis se
## X11 -1.03063121 14.476900
## X12 -0.30729538 5.892225
## X13 -1.22818040 6.536989
## X14 -0.46882830 5.680232
## X15 0.38015627 5.030000
## X16 0.08898838 4.014086
## X17 -0.42162311 5.384600
freq_adult_sp_minmax_types_raw <- freq_adult %>%
filter(language == "Spanish") %>%
group_by(activity, language) %>%
mutate(min = min(types, na.rm = T),
max = max(types, na.rm = T)) %>%
distinct(activity, min, max)
freq_adult_sp_minmax_types_raw
## # A tibble: 7 × 4
## # Groups: activity, language [7]
## language activity min max
## <fct> <fct> <dbl> <dbl>
## 1 Spanish books 44 361
## 2 Spanish play 7 186
## 3 Spanish unst. conv. 5 219
## 4 Spanish routines 4 175
## 5 Spanish food 5 166
## 6 Spanish adult-centered 0 212
## 7 Spanish non_tcds 0 289
# plot for all speech
ggplot(freq_adult, aes(activity, types_permin, fill = activity)) +
theme_classic() +
geom_boxplot() +
geom_jitter(alpha = .3) +
scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) +
facet_wrap(~ language) +
theme(legend.position= "none") +
theme(text = element_text(size = 35)) +
theme(axis.text.x = element_text(angle = 20, hjust = .7)) +
labs(x = "", y = "Types Rate")
ggsave("./figures/boxplot_types_rate2.pdf", dpi = 300, width = 18, height = 8, units = "in")
# descriptives
describeBy(freq_adult_act_permin_en$types_permin_act, freq_adult_act_permin_en$activity, mat = T, fast = T)
## item group1 vars n mean sd median min max range skew
## X11 1 books 1 22 32.06383 9.75467 32.15751 14.809274 48.75449 33.94522 0.1938847
## X12 2 play 1 39 27.07891 12.32556 24.04612 12.245120 66.31838 54.07326 1.5066690
## X13 3 food 1 31 39.82161 34.99837 30.28977 9.280570 191.38756 182.10699 2.7880293
## X14 4 routines 1 32 41.75257 25.57367 35.05380 14.409224 157.18563 142.77640 2.8301947
## X15 5 unst. conv. 1 43 41.83628 27.40421 36.04217 5.648173 176.14679 170.49862 2.8015041
## X16 6 adult-centered 1 45 61.14224 75.86224 46.13497 10.605640 534.52116 523.91552 5.3822148
## X17 7 non_tcds 1 45 20.66782 11.41460 20.40530 2.392211 48.90393 46.51172 0.5745082
## kurtosis se
## X11 -1.0590251 2.079703
## X12 2.2094453 1.973670
## X13 8.8782125 6.285893
## X14 10.1753362 4.520830
## X15 10.9712805 4.179103
## X16 30.7217214 11.308874
## X17 -0.3456499 1.701588
freq_adult_en_minmax_types_rate <- freq_adult %>%
filter(language == "English") %>%
group_by(activity, language) %>%
mutate(min = min(types_permin, na.rm = T),
max = max(types_permin, na.rm = T)) %>%
distinct(activity, min, max)
freq_adult_en_minmax_types_rate
## # A tibble: 7 × 4
## # Groups: activity, language [7]
## language activity min max
## <fct> <fct> <dbl> <dbl>
## 1 English books 9.89 64.4
## 2 English play 0 149.
## 3 English unst. conv. 2.93 176.
## 4 English routines 11.2 157.
## 5 English food 8.50 191.
## 6 English adult-centered 0 535.
## 7 English non_tcds 0 92.2
describeBy(freq_adult_act_permin_sp$types_permin_act, freq_adult_act_permin_sp$activity, mat = T, fast = T)
## item group1 vars n mean sd median min max range skew
## X11 1 books 1 20 25.19438 9.378178 25.62804 10.866809 49.60044 38.73363 0.6192718
## X12 2 play 1 37 21.98845 11.038304 19.62847 4.405509 66.43467 62.02916 1.7173151
## X13 3 food 1 31 19.09226 8.249124 17.82146 8.576942 39.99831 31.42137 0.8860032
## X14 4 routines 1 35 30.57141 14.693167 28.80377 9.082726 73.61036 64.52764 1.1037506
## X15 5 unst. conv. 1 43 32.41628 16.819376 26.79962 10.061461 100.19711 90.13565 1.7190575
## X16 6 adult-centered 1 45 38.00249 28.213513 33.91582 7.901010 137.53863 129.63762 1.8238166
## X17 7 non_tcds 1 45 18.23642 9.936926 16.33417 1.666453 43.46532 41.79886 0.5847300
## kurtosis se
## X11 0.07872493 2.097024
## X12 4.88857397 1.814686
## X13 -0.11762887 1.481586
## X14 1.11375487 2.483598
## X15 4.05465209 2.564931
## X16 3.43928396 4.205822
## X17 -0.16215447 1.481309
freq_adult_sp_minmax_types_rate <- freq_adult %>%
filter(language == "Spanish") %>%
group_by(activity, language) %>%
mutate(min = min(types_permin, na.rm = T),
max = max(types_permin, na.rm = T)) %>%
distinct(activity, min, max)
freq_adult_sp_minmax_types_rate
## # A tibble: 7 × 4
## # Groups: activity, language [7]
## language activity min max
## <fct> <fct> <dbl> <dbl>
## 1 Spanish books 10.9 49.6
## 2 Spanish play 4.41 71.2
## 3 Spanish unst. conv. 6.68 100.
## 4 Spanish routines 8.17 171.
## 5 Spanish food 5.38 55.7
## 6 Spanish adult-centered 0 343.
## 7 Spanish non_tcds 0 130.
MLU
# relabel for plots
mlu_adult <- mlu_adult %>%
mutate(activity = recode(activity, "conv" = "unst. conv.", "ac" = "adult-centered",
"ods" = "non_tcds"),
language = recode(language, "english" = "English", "spanish" = "Spanish"))
# plot
ggplot(mlu_adult, aes(activity, mlu_w, fill = activity)) +
theme_classic() +
geom_boxplot() +
geom_jitter(alpha = .3) +
scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey", "black")) +
facet_wrap(~ language) +
theme(legend.position= "none") +
theme(text = element_text(size = 35)) +
theme(axis.text.x = element_text(angle = 20, hjust = .7)) +
labs(x = "", y = "MLUw")
ggsave("./figures/boxplot_mluw2.pdf", dpi = 300, width = 18, height = 8, units = "in")
# summarize data per participant and activity (all speech only)
mlu_adult_act <- mlu_adult %>%
group_by(id, activity) %>%
mutate(mlu_w_act = mean(mlu_w)) %>%
distinct(id, language, activity, mlu_w_act)
mlu_adult_act_en <- mlu_adult_act %>% filter(language == "English")
mlu_adult_act_sp <- mlu_adult_act %>% filter(language == "Spanish")
# descriptives
describeBy(mlu_adult_act_en$mlu_w_act, mlu_adult_act_en$activity, mat = T, fast = T)
## item group1 vars n mean sd median min max range skew
## X11 1 books 1 22 4.860163 1.2386474 4.849059 2.843305 7.287671 4.444366 0.38589952
## X12 2 play 1 39 3.544910 0.7799806 3.361351 2.117647 5.428571 3.310924 0.37501840
## X13 3 food 1 31 3.508170 0.8406000 3.268261 2.125000 5.328869 3.203869 0.35194344
## X14 4 routines 1 32 3.665860 0.7357554 3.610858 2.200000 5.670455 3.470455 0.54603786
## X15 5 unst. conv. 1 43 3.763843 0.8423445 3.772650 1.500000 5.769231 4.269231 -0.32572952
## X16 6 adult-centered 1 45 3.524902 0.7379635 3.407890 2.248271 5.668605 3.420334 0.87918296
## X17 7 non_tcds 1 45 4.150646 0.8011976 4.165139 2.542017 5.770249 3.228232 0.04840642
## kurtosis se
## X11 -0.76500796 0.2640805
## X12 -0.52053041 0.1248969
## X13 -0.75764546 0.1509762
## X14 0.06066605 0.1300644
## X15 0.41938715 0.1284563
## X16 0.83025135 0.1100091
## X17 -0.82614709 0.1194355
mlu_adult_en_minmax <- mlu_adult %>%
filter(language == "English") %>%
group_by(activity, language) %>%
mutate(min = min(mlu_w, na.rm = T),
max = max(mlu_w, na.rm = T)) %>%
distinct(activity, min, max)
mlu_adult_en_minmax
## # A tibble: 7 × 4
## # Groups: activity, language [7]
## language activity min max
## <fct> <fct> <dbl> <dbl>
## 1 English adult-centered 1 9
## 2 English unst. conv. 1 6.67
## 3 English non_tcds 1 8
## 4 English play 1.33 5.69
## 5 English routines 2.2 8
## 6 English books 1.51 8.73
## 7 English food 1 6.06
describeBy(mlu_adult_act_sp$mlu_w_act, mlu_adult_act_sp$activity, mat = T, fast = T)
## item group1 vars n mean sd median min max range skew
## X11 1 books 1 20 3.822411 1.3037314 3.758225 1.864407 6.566855 4.702448 0.4630879
## X12 2 play 1 37 2.698639 0.7463847 2.542857 1.422360 4.545918 3.123558 0.4675977
## X13 3 food 1 31 2.772200 0.6975939 2.746032 1.391304 4.230769 2.839465 -0.0686621
## X14 4 routines 1 35 3.029701 0.7381221 3.000000 1.791667 4.485947 2.694281 0.2233092
## X15 5 unst. conv. 1 43 3.054863 0.5741442 2.821722 2.004357 4.589286 2.584928 0.7193086
## X16 6 adult-centered 1 45 2.756766 0.6920525 2.682044 1.666667 5.019476 3.352810 0.8007356
## X17 7 non_tcds 1 45 3.499380 0.7220236 3.461212 1.890000 5.202889 3.312889 0.1197576
## kurtosis se
## X11 -0.7168780 0.29152319
## X12 -0.4561800 0.12270488
## X13 -0.5377128 0.12529156
## X14 -1.0421283 0.12476540
## X15 -0.2066920 0.08755616
## X16 0.6194120 0.10316510
## X17 -0.2901765 0.10763292
mlu_adult_sp_minmax <- mlu_adult %>%
filter(language == "Spanish") %>%
group_by(activity, language) %>%
mutate(min = min(mlu_w, na.rm = T),
max = max(mlu_w, na.rm = T)) %>%
distinct(activity, min, max)
mlu_adult_sp_minmax
## # A tibble: 7 × 4
## # Groups: activity, language [7]
## language activity min max
## <fct> <fct> <dbl> <dbl>
## 1 Spanish books 1.86 8.36
## 2 Spanish play 1.21 6.48
## 3 Spanish unst. conv. 1 5.33
## 4 Spanish routines 1.58 5
## 5 Spanish food 1.39 4.32
## 6 Spanish adult-centered 1 6.22
## 7 Spanish non_tcds 1 7.9
CHIP (responses, imitations/expansions; these are utterances that follow a child’s utterance, within a 5 utterance window)
# create dfs
chip2 <- chip %>%
mutate(activity = recode(activity, "conv" = "unst. conv.", "ac" = "adult-centered"),
language = recode(language, "english" = "English", "spanish" = "Spanish"))
# plot - total adult responses
ggplot(chip2, aes(activity, total_adult_resp, fill = activity)) +
geom_boxplot() +
geom_jitter(alpha = .2) +
scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey")) +
facet_wrap(~ language, ncol = 1) +
theme(legend.position= "none") +
theme(text = element_text(size = 35)) +
theme(axis.text.x = element_text(angle = 20, hjust = .7)) +
labs(x = "", y = "")
# plot - prop of adult resp/child utt
ggplot(chip2, aes(activity, prop_adultresp_outof_childutt, fill = activity)) +
theme_classic() +
geom_boxplot() +
geom_jitter(alpha = .3) +
scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey")) +
facet_wrap(~ language) +
theme(legend.position= "none") +
theme(text = element_text(size = 35)) +
geom_hline(yintercept = 1) + # caregiver and child = equal utts
theme(axis.text.x = element_text(angle = 20, hjust = .7)) +
labs(x = "", y = "Prop Responses")
ggsave("./figures/boxplot_prop_resp2.pdf", dpi = 300, width = 18, height = 8, units = "in")
# summarize data per participant and activity (all speech only)
chip_act <- chip %>%
group_by(id, activity) %>%
mutate(prop_resp_act = mean(prop_adultresp_outof_childutt),
prop_imitexp_act = mean(prop_adult_imitexp_outof_childutt)) %>%
distinct(id, language, activity, prop_resp_act, prop_imitexp_act)
chip_act_en <- chip_act %>% filter(language == "english")
chip_act_sp <- chip_act %>% filter(language == "spanish")
# descriptives
describeBy(chip_act_en$prop_resp_act, chip_act_en$activity, mat = T, fast = T)
## item group1 vars n mean sd median min max range skew
## X11 1 books 1 22 2.378070 0.7766933 2.342175 1.1864443 4.366667 3.180222 0.69916859
## X12 2 play 1 36 2.074418 0.6357208 1.925870 1.1301724 4.666667 3.536494 1.95571413
## X13 3 food 1 29 1.914779 0.5461100 1.944444 0.8141026 3.066667 2.252564 0.14337467
## X14 4 routines 1 28 1.856904 0.5897762 1.777778 0.8000000 3.333333 2.533333 0.57225823
## X15 5 conv 1 36 1.866984 0.5430045 1.849124 0.6250000 2.933333 2.308333 -0.03709437
## X16 6 ac 1 22 1.855095 0.5401576 1.815131 0.9909502 3.248677 2.257727 0.54471173
## kurtosis se
## X11 0.03584900 0.16559157
## X12 5.35965871 0.10595346
## X13 -0.49085257 0.10141009
## X14 -0.07450000 0.11145723
## X15 -0.55933392 0.09050074
## X16 -0.07700799 0.11516198
chip_adult_en_minmax_propresp <- chip %>%
filter(language == "english") %>%
group_by(activity, language) %>%
mutate(min = min(prop_adultresp_outof_childutt, na.rm = T),
max = max(prop_adultresp_outof_childutt, na.rm = T)) %>%
distinct(activity, min, max)
chip_adult_en_minmax_propresp
## # A tibble: 6 × 4
## # Groups: activity, language [6]
## language activity min max
## <fct> <fct> <dbl> <dbl>
## 1 english ac 0 5
## 2 english conv 0.571 5
## 3 english play 0.667 4.67
## 4 english routines 0.5 3.8
## 5 english books 0.859 5
## 6 english food 0 5
describeBy(chip_act_sp$prop_resp_act, chip_act_sp$activity, mat = T, fast = T)
## item group1 vars n mean sd median min max range skew
## X11 1 books 1 20 2.125135 0.4832411 2.109579 1.0617284 3.055556 1.993827 -0.3732188
## X12 2 play 1 31 1.927563 0.6742332 1.750000 0.8125000 3.500000 2.687500 0.5481160
## X13 3 food 1 30 1.762135 0.7100370 1.620192 0.3333333 3.314815 2.981481 0.3986445
## X14 4 routines 1 31 1.904239 0.5335071 1.900000 1.0000000 3.166667 2.166667 0.3703352
## X15 5 conv 1 35 1.818201 0.5605011 1.683208 0.8823529 3.119048 2.236695 0.6188577
## X16 6 ac 1 25 1.589711 0.6019761 1.524806 0.7083333 3.750000 3.041667 1.6448000
## kurtosis se
## X11 -0.2278578 0.10805599
## X12 -0.5407181 0.12109586
## X13 -0.3813870 0.12963442
## X14 -0.4500460 0.09582070
## X15 -0.4151453 0.09474197
## X16 4.0945489 0.12039523
chip_adult_sp_minmax_propresp <- chip %>%
filter(language == "spanish") %>%
group_by(activity, language) %>%
mutate(min = min(prop_adultresp_outof_childutt, na.rm = T),
max = max(prop_adultresp_outof_childutt, na.rm = T)) %>%
distinct(activity, min, max)
chip_adult_sp_minmax_propresp
## # A tibble: 6 × 4
## # Groups: activity, language [6]
## language activity min max
## <fct> <fct> <dbl> <dbl>
## 1 spanish ac 0 5
## 2 spanish conv 0.7 5
## 3 spanish play 0.765 5
## 4 spanish food 0.333 4
## 5 spanish routines 1 5
## 6 spanish books 1.06 3.06
# plot
ggplot(chip2, aes(activity, total_adult_imitexp, fill = activity)) +
geom_boxplot() +
geom_jitter(alpha = .2) +
scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey")) +
facet_wrap(~ language, ncol = 1) +
theme(legend.position= "none") +
theme(text = element_text(size = 35)) +
theme(axis.text.x = element_text(angle = 20, hjust = .7)) +
labs(x = "", y = "")
# plot
ggplot(chip2, aes(activity, prop_adult_imitexp_outof_childutt, fill = activity)) +
theme_classic() +
geom_boxplot() +
geom_jitter(alpha = .3) +
scale_fill_manual(values=c("darkviolet", "firebrick1", "green2", "dodgerblue1", "darkgoldenrod1", "darkgrey")) +
facet_wrap(~ language) +
theme(legend.position= "none") +
theme(text = element_text(size = 35)) +
geom_hline(yintercept = 1) + # caregiver and child = equal utts
theme(axis.text.x = element_text(angle = 20, hjust = .7)) +
labs(x = "", y = "Prop Imitations/Expansions")
ggsave("./figures/boxplot_prop_imitexp2.pdf", dpi = 300, width = 18, height = 8, units = "in")
# descriptives
describeBy(chip_act_en$prop_imitexp_act, chip_act_en$activity, mat = T, fast = T)
## item group1 vars n mean sd median min max range skew
## X11 1 books 1 22 0.4822402 0.2032150 0.4606055 0.07142857 0.8000000 0.7285714 -0.17741053
## X12 2 play 1 36 0.4082746 0.2249434 0.4011154 0.00000000 1.1157407 1.1157407 0.96845872
## X13 3 food 1 29 0.3716799 0.1958653 0.3809524 0.00000000 1.0000000 1.0000000 0.91498868
## X14 4 routines 1 28 0.3527558 0.1764344 0.3737981 0.00000000 0.6746032 0.6746032 -0.08511947
## X15 5 conv 1 36 0.3490170 0.1925299 0.3014535 0.00000000 0.8888889 0.8888889 0.68222826
## X16 6 ac 1 22 0.3648964 0.1674221 0.3302305 0.18972991 0.8666667 0.6769368 1.33927429
## kurtosis se
## X11 -0.8102586 0.04332558
## X12 1.4668965 0.03749056
## X13 1.7868059 0.03637128
## X14 -0.7390449 0.03334296
## X15 0.1562880 0.03208831
## X16 1.3695376 0.03569452
chip_adult_en_minmax_propimitexp <- chip %>%
filter(language == "english") %>%
group_by(activity, language) %>%
mutate(min = min(prop_adult_imitexp_outof_childutt, na.rm = T),
max = max(prop_adult_imitexp_outof_childutt, na.rm = T)) %>%
distinct(activity, min, max)
chip_adult_en_minmax_propimitexp
## # A tibble: 6 × 4
## # Groups: activity, language [6]
## language activity min max
## <fct> <fct> <dbl> <dbl>
## 1 english ac 0 4
## 2 english conv 0 2
## 3 english play 0 1.38
## 4 english routines 0 1
## 5 english books 0.0714 1
## 6 english food 0 1
describeBy(chip_act_sp$prop_imitexp_act, chip_act_sp$activity, mat = T, fast = T)
## item group1 vars n mean sd median min max range skew
## X11 1 books 1 20 0.4052845 0.2414756 0.3428358 0.0625 1.0000000 0.9375000 0.7095279
## X12 2 play 1 31 0.3536352 0.2224772 0.3333333 0.0000 1.1700000 1.1700000 1.4354164
## X13 3 food 1 30 0.3333968 0.2401772 0.3088362 0.0000 1.0000000 1.0000000 0.9717588
## X14 4 routines 1 31 0.3949755 0.2920670 0.3562162 0.0000 1.0322636 1.0322636 0.5266545
## X15 5 conv 1 35 0.3856342 0.2314362 0.3640005 0.0000 0.9523810 0.9523810 0.7019027
## X16 6 ac 1 25 0.3015461 0.1634934 0.2982069 0.0000 0.7603067 0.7603067 0.8202077
## kurtosis se
## X11 -0.32529803 0.05399559
## X12 3.62662516 0.03995808
## X13 0.58774011 0.04385016
## X14 -0.74778343 0.05245678
## X15 -0.07136292 0.03911986
## X16 0.67007692 0.03269868
chip_adult_sp_minmax_propimitexp <- chip %>%
filter(language == "spanish") %>%
group_by(activity, language) %>%
mutate(min = min(prop_adult_imitexp_outof_childutt, na.rm = T),
max = max(prop_adult_imitexp_outof_childutt, na.rm = T)) %>%
distinct(activity, min, max)
chip_adult_sp_minmax_propimitexp
## # A tibble: 6 × 4
## # Groups: activity, language [6]
## language activity min max
## <fct> <fct> <dbl> <dbl>
## 1 spanish ac 0 1.67
## 2 spanish conv 0 2
## 3 spanish play 0 2
## 4 spanish food 0 1
## 5 spanish routines 0 3
## 6 spanish books 0 1
# this was extracted using speaker roles from the FREQ output
# in a prior code, the n_speakers was calculated to reflect the number of speakers for the speaker role (child, adults), during the respective activity and segment num
# removing kwalods because these are overlapping utterances during tCDS activities
num_speakers <- read_csv("./data_demo_lena_transcripts/num_speakers.csv") %>%
filter(Speaker2 == "ADULTS" & activity != "kwalods") %>%
distinct(id, activity, segment_num, language, n_speakers) %>%
mutate(bin_n_adult_speakers = ifelse(n_speakers > 1, "more_than1", "1_adult")) %>%
group_by(activity) %>%
mutate(n_instances_by_activity = n()) %>%
ungroup() %>%
group_by(activity, bin_n_adult_speakers) %>%
mutate(n_by_bin = n())
num_speakers %>%
ungroup() %>%
distinct(activity, bin_n_adult_speakers, n_by_bin, n_instances_by_activity) %>%
mutate(prop = n_by_bin / n_instances_by_activity)
## # A tibble: 14 × 5
## activity bin_n_adult_speakers n_by_bin n_instances_by_activity prop
## <chr> <chr> <int> <int> <dbl>
## 1 books 1_adult 67 84 0.798
## 2 books more_than1 17 84 0.202
## 3 play 1_adult 133 181 0.735
## 4 play more_than1 48 181 0.265
## 5 conv 1_adult 157 254 0.618
## 6 conv more_than1 97 254 0.382
## 7 routines 1_adult 91 121 0.752
## 8 routines more_than1 30 121 0.248
## 9 food 1_adult 74 115 0.643
## 10 food more_than1 41 115 0.357
## 11 ac 1_adult 215 363 0.592
## 12 ac more_than1 148 363 0.408
## 13 gemods 1_adult 259 536 0.483
## 14 gemods more_than1 277 536 0.517